package com.html2rss.core.parser;

import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.NodeFilter;

/**
 * User: vvs
 * Date: 31.03.2007
 * Time: 14:13:34
 */
public class Parser extends org.htmlparser.Parser{

    public NodeList parse(NodeFilter filter) throws ParserException {
        try {
            return super.parse(filter);
        } catch (EncodingChangeException e) {
//            Pattern p = Pattern.compile("to ([\\w-]+)");
//            Matcher m =p.matcher(e.getMessage());
//            m.find();
//            String encoding = m.group(1);
//            setEncoding(encoding);
            reset();
            return parse(filter);
        }
    }

    public static void main(String[] args) {
        String html = "<body>";
        for (int i=0; i<100; i++)
            html += "<li><table><tr><td>1111111111111111</table>\n";
        html += "</body>";
        Parser parser = new Parser();
        try {
            parser.setResource(html);
            System.out.println(parser.parse(null).toHtml());
        } catch (ParserException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }
}
